##################################################
## Replication materials for 
## "Competent legislators or mere pawns? 
## Experimental evidence of attitudes toward gender quota politicians"
## by Carolyn Barnett, Alexandra Blackman, and Marwa Shalaby

## 3. Appendix Figures: Power Analyses
## This file creates Appendix Table A.2 and Figures A.1 and A.2

## NOTES: 
# - See file "01_setup.R" to load necessary packages
# - All functions must be established before attempting to create figures

# Table A.2 --------------------------------

## This table was manually compiled after running the simulations
## and generating the figures below. Values in the table reflect
## where the simulations show power crossing the thresholds of 
## 0.8 and 0.95. Refer to the data generated for the figures below.


# Set global values for simulations ---------------------------

set.seed(20472) # for reproducibility

n <- 900 # of 1800 respondnets, half randomly assigned to this experiment
alpha <- 0.05 # threshold p-val
split <- c(0.5,0.25,0.25) # probability of assignment to each treatment

# treatment variable names
att_names <- c("gender_how", # gender of the politician and how elected
               "collab") # cross-party vs. intra-party collaboration

# Sample-generating functions ----------------

# set up the data frame
make_sample <- function(outcome_name){
  # names for columns 
  samp_names <- c("UID", 
                  att_names,
                  # placeholders for outcome variables
                  outcome_name)
  
  # initialize data frame with n rows, number of columns by names
  samp <- data.frame(matrix(nrow = n, 
                            ncol = length(samp_names)))
  names(samp) <- samp_names
  
  # generate unique player IDs 
  samp$UID <- 1:900
  
  return(samp)
}

# randomly generate treatment assignments
# samp = the sample data frame generated in make_sample()
# split = probability of assignment to each politician gender-mode treatment
make_treatments <- function(samp, split){
  # generate treatment assignments 
  samp <- samp %>%
    mutate(
      # split = how we will split the sample 
      # 50% to see quota women
      # 25% each to see non-quota women and men
      gender_how = sample.int(3, n, replace = T,
                              prob = split),
      # 50% probability of seeing cross-party vs. intra-party collab
      collab = sample.int(2, n, replace = T)
    )
  return(samp)
}


# Outcome-generating functions -----------------------

# generate two distributions 1-10 with a difference in means
# (in expectation) of the desired effect size
make_outcomes_rating <- function(N1, N2, effect_size){
  S1 = N1 * 5 # desired mean
  d1 <- tabulate(sample.int(10*N1, S1-N1, 
                            replace = T) %% N1 + 1, N1) + 1
  d1 <- ifelse(d1 > 10, 10, d1) # truncate to 1-10
  
  S2 = N2 * (5 + effect_size)
  d2 <- tabulate(sample.int(10*N2, S2-N2, 
                            replace = T) %% N2 + 1, N2) + 1
  d2 <- ifelse(d2 > 10, 10, d2) # truncate to 1-10
  
  actual_diff <- mean(d2) - mean(d1)
  return(list(d1, d2, actual_diff))
}

# test the function
test <- make_outcomes_rating(1000, 1178, 0.5)
mean(test[[1]])
table(test[[1]]) 
mean(test[[2]])
table(test[[2]])
test[[3]] 
# actual difference will vary slightly with sample size differences
# and value truncation

# Plotting functions -----------------

# extract results from simulation output object
# and combine with the potential effect sizes tested
# result list = output of the simulation function
# values_vector = potential effect sizes tested
extract_results <- function(result_list, values_vector){
  
  result_vector <- c(NA)
  
  for (i in 1:length(result_list)){
    result_vector[i] <- result_list[[i]][1]
  }
  
  result_vector <- as.data.frame(result_vector)
  pd <- data.frame(values_vector, result_vector$result_vector)
  
  return(pd)
}

# plot the results
# pd = dataframe returned from extract_results()
# x_label = x-asix label
plot_results <- function(pd, x_label){
  
  pwr_data <- pd %>% 
    dplyr::rename(Difference = 1,
                  Power = 2) 
  
  pwr_plot <- ggplot(pwr_data, aes(x = Difference,
                                   y = Power)) +
    geom_point() +
    geom_line(size = 2) +
    geom_hline(yintercept = 0.8, linetype = 3) + 
    geom_hline(yintercept = 0.95, linetype = 3) + 
    theme_bw() +
    labs(x = x_label,
         y = "Power") +
    scale_x_continuous(breaks = pwr_data$Difference,
                       labels = as.character(pwr_data$Difference)) +
    scale_y_continuous(limits = c(0,1),
                       breaks = c(0.2,0.4,0.6,0.8,0.95),
                       labels = c("0.2","0.4","0.6",
                                  "0.8","0.95")) +
    theme_cb()
  
  return(pwr_plot)
}




# Figure A.1 ----------------------

## Shows the power of our design to detect different effect sizes 
## in a comparison of ratings of quota-elected women compared to 
## either of the other two groups.

## Simulation function ---------------

h1_rate <- function(n, alpha, split,
                    rate_diff){  # effect size 
  
  # initialize vectors for output of loops
  p <- c(NA) # p-value 
  p_sig <- c(NA) # dummy variable for significant p-value
  
  ## Loop for simulations
  for(i in 1:1000){
    ## Part 1: DF set-up
    samp <- make_sample(outcome_name = "rating")
    samp <- make_treatments(samp, split = split)
    
    # Part 2: Generate outcome variables from random distributions
    # hypothesis is that quota women will have *lower* rating
    rate_distrib <- make_outcomes_rating(N1 = sum(samp$gender_how == 1),
                                         N2 = sum(samp$gender_how == 2),
                                         effect = rate_diff)
    samp$rating[samp$gender_how == 1] <- rate_distrib[[1]]
    samp$rating[samp$gender_how == 2] <- rate_distrib[[2]]
    
    # Part 3: Calculate quantities of interest and save p-values
    
    # conduct t-test and save p-value
    result <- t.test(samp$rating[samp$gender_how == 1],
                     samp$rating[samp$gender_how == 2],
                     alternative = "less")
    result
    
    p[i] <- result$p.value
    p_sig[i] <- ifelse(p[i] <= alpha, 1, 0)
    
  }
  
  ## After running loops: 
  # Calculate proportion of significant p-values and return
  sig_prop <- sum(p_sig) / 1000
  
  return(sig_prop)
}

# test the function
h1_rate(n = n, 
        alpha = alpha, 
        rate_diff = 0.3, 
        split = split)


## Run simulations --------------

h1_rate_diff_vals <- seq(from = 0.2, to = 0.4, by = 0.02) # values to test

start_time <- Sys.time()
h1_rate_results <- list()
for(i in 1:length(h1_rate_diff_vals)){
  h1_rate_results[[i]] <- h1_rate(n = n, 
                                  alpha = alpha, 
                                  rate_diff = h1_rate_diff_vals[i],
                                  split = split)
}
end_time <- Sys.time()

## Plot and Examine Results -------------------------

pd <- extract_results(result_list = h1_rate_results,
                      values_vector = h1_rate_diff_vals)

pwr_plot <- plot_results(pd = pd,
                         x_label = "Hypothetical Difference in Means")
pwr_plot

ggsave("h1_rate_test.png", plot = last_plot(),
       path = "Figures/", limitsize = F,
       width = 7.6, height = 5, units = "in", dpi =300)

# Get values for Table A.2
pd # power > 0.8 at effect size 0.3, power > 0.95 at 0.32

# Figure A.2 ----------------------

## Shows the power of our design to detect different effect 
## sizes in a comparison, among quota-elected women, of those 
## described as working with other political parties vs. 
## their co-partisans.

## Simulation function --------------

h2_rate <- function(n, alpha, split,
                    rate_diff){  # effect size 
  
  # initialize vectors for output of loops
  p <- c(NA) # p-value 
  p_sig <- c(NA) # dummy variable for significant p-value
  
  ## Loop for simulations
  for(i in 1:1000){
    ## Part 1: DF set-up
    samp <- make_sample(outcome_name = "rating")
    samp <- make_treatments(samp, split = split)
    
    # Part 2: Generate outcome variables from random distributions
    # note: hypothesis is that pawn rating is *higher* for quota women
    rate_distrib <- make_outcomes_rating(N1 = sum(samp$gender_how == 1 & 
                                                    samp$collab == 1),
                                         N2 = sum(samp$gender_how == 1 & 
                                                    samp$collab == 2),
                                         effect = rate_diff)
    
    samp$rating[samp$gender_how == 1 & 
                  samp$collab == 1] <- rate_distrib[[1]]
    samp$rating[samp$gender_how == 1 & 
                  samp$collab == 2] <- rate_distrib[[2]]
    
    # Part 3: Calculate quantities of interest and save p-values
    
    # conduct t-test and save p-value
    result <- t.test(samp$rating[samp$gender_how == 1 & 
                                   samp$collab == 1],
                     samp$rating[samp$gender_how == 1 & 
                                   samp$collab == 2],
                     alternative = "less")
    result
    
    p[i] <- result$p.value
    p_sig[i] <- ifelse(p[i] <= alpha, 1, 0)
  }
  
  ## After running loops: 
  # Calculate proportion of significant p-values and return
  sig_prop <- sum(p_sig) / 1000
  
  return(sig_prop)
}

# test the function
h2_rate(n = n, 
        alpha = alpha, 
        rate_diff = 0.3, 
        split = split)

## Run simulations --------------

h2_rate_diff_vals <- seq(from = 0.2, to = 0.4, by = 0.02) # values to test

start_time <- Sys.time()
h2_rate_results <- list()
for(i in 1:length(h2_rate_diff_vals)){
  h2_rate_results[[i]] <- h2_rate(n = n, 
                                  alpha = alpha, 
                                  rate_diff = h2_rate_diff_vals[i],
                                  split = split)
}
end_time <- Sys.time()

## Plot and Examine Results -------------------------

pd <- extract_results(result_list = h2_rate_results,
                      values_vector = h2_rate_diff_vals)

pwr_plot <- plot_results(pd = pd,
                         x_label = "Hypothetical Difference in Means")
pwr_plot

ggsave("h2_rate_test.png", plot = last_plot(),
       path = "Figures/", limitsize = F,
       width = 7.6, height = 5, units = "in", dpi =300)

# Get values for Table A.2
pd # power > 0.8 at effect size 0.34, power > 0.95 at 0.36
